This notebook…
This notebook requires…
# tidyverse packages
library(dplyr) # data wrangling
library(readr) # read/write tabular data
library(stringr) # work with strings
library(tidyr) # data wrangling
# spatial packages
library(janitor) # data wrangling
library(sf) # spatial data tools
library(mapview) # projections
library(tigris) # census data wrangling
# other packages
library(here) # file path management
library(naniar) # missing data
library(ggplot2)
library(viridis) # palettes
library(leaflet)
library(tidycensus)
library(sp)
library(ggplot2)
library(RColorBrewer) # color palettes
library(viridis) # color palettes
loading asthma data from MoPhim data base asthma is for 2015
asthma_15 <- read_csv(here("data","raw", "asthma", "asthma2015.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2015.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2015.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2015.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2015.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2015.csv'
... ... ......... ......... ...................................................................................................................
See problems(...) for more details.
Getting rid of first three rows bc they are pointless text.
asthma_15 %>%
slice(4:1029) -> asthma_15
Getting rid of third col with no data. aslo renaming cols to ‘count’ and ‘zip’
asthma_15 %>%
select(-X3) %>%
rename(count = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`)-> asthma_15
Changing X to NA, making the count variable numeric
mutate(asthma_15, count = ifelse(count == "x", NA, count)) -> asthma_15
mutate(asthma_15, count = as.numeric(count)) -> asthma_15
`
loading Mo zip codes
moZip <- zctas(state = "Missouri", year = 2010,
cb = FALSE, class ="sf")
Using FIPS code '29' for state 'Missouri'
ZCTAs can take several minutes to download. To cache the data and avoid re-downloading in future R sessions, set `options(tigris_use_cache = TRUE)`
Changing object to SF
st_as_sf(moZip) -> moZip
cleaning names, selecting just cols for zip and geometry, changing col name to zip
moZip %>%
clean_names() %>%
select(zcta5ce10, geometry) %>%
rename(zip = `zcta5ce10`)-> moZip
loading data for asthma 2014
asthma_14<- read_csv(here("data","raw", "asthma", "asthma2014.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2014.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2014.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2014.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2014.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2014.csv'
... ... ......... ......... ...................................................................................................................
See problems(...) for more details.
getting rid of first three rows bc they have nothing in them
asthma_14 %>%
slice(4:1029) -> asthma_14
getting rid of empty third col, renaming remaning cols
asthma_14 %>%
select(-X3) %>%
rename(count = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma_14
changing X to NA, changing to numeric
mutate(asthma_14, count = ifelse(count == "x", NA, count)) -> asthma_14
mutate(asthma_14, count = as.numeric(count)) -> asthma_14
left join to asthma_cleaned by zip. Now table has data for both 2014 and 2015
left_join(asthma_15, asthma_14, by = "zip") -> asthma_full
loading asthma data for 2013
asthma_13 <- read_csv(here("data","raw", "asthma", "asthma2013.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2013.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2013.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2013.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2013.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2013.csv'
... ... ......... ......... ...................................................................................................................
See problems(...) for more details.
Getting rid of first 4 rows bc they have no data
asthma_13 %>%
slice(4:1029) -> asthma_13
getting rid of third col, renaming other cols
asthma_13 %>%
select(-X3) %>%
rename(count_13 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma_13
changing X to NA, changing count to numeric
mutate(asthma_13, count_13 = ifelse(count_13 == "x", NA, count_13)) -> asthma_13
mutate(asthma_13, count_13 = as.numeric(count_13)) -> asthma_13
left joing to joined_14_15, so table now has data for years 2013, 2014, and 2015
left_join(asthma_full, asthma_13, by = "zip") -> asthma_full
loading data for 2012
asthma_12 <- read_csv(here("data","raw", "asthma", "asthma2012.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2012.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2012.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2012.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2012.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2012.csv'
... ... ......... ......... ...................................................................................................................
See problems(...) for more details.
getting rid of empty cols
asthma_12 %>%
slice(4:1029) -> asthma_12
cleaning data
asthma_12 %>%
select(-X3) %>%
rename(count_12 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma_12
changing X to NA and changing count to numeric
mutate(asthma_12, count_12 = ifelse(count_12 == "x", NA, count_12)) -> asthma_12
mutate(asthma_12, count_12 = as.numeric(count_12)) -> asthma_12
left joing to joined_13_14_15 by zip. Table now has data for 2012 through 2015
left_join(asthma_full, asthma_12, by = "zip") -> asthma_full
loading data for 2011
asthma_11 <- read_csv(here("data","raw", "asthma", "asthma2011.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2011.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2011.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2011.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2011.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/raw/asthma/asthma2011.csv'
... ... ......... ......... ...................................................................................................................
See problems(...) for more details.
getting rid of empty cols
asthma_11 %>%
slice(4:1029) -> asthma_11
cleaning data
asthma_11 %>%
select(-X3) %>%
rename(count_11 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma_11
changing X to NA, and changing count to be numeric
mutate(asthma_11, count_11 = ifelse(count_11 == "x", NA, count_11)) -> asthma_11
mutate(asthma_11, count_11 = as.numeric(count_11)) -> asthma_11
joining to joined_12_15 for years 2011-2015
left_join(asthma_full, asthma_11, by = "zip") -> asthma_full
joining data with geometry by zip
asthma_full <- left_join(moZip, asthma_full, by = "zip")
asthma_full %>%
rename(
count_14 = count.y,
count_15 = count.x
) -> asthma_full
filtering for Zip codes in St Louis Metro Area, first filter gets range, Exclude gets rid of zip codes in that range that aren’t included in the metro area.
asthma_full %>%
filter(zip >= (63005))%>%
filter(zip <= (63390)) -> asthma_full
exclude <- c(63091, 63155, 63333, 63334, 63336, 63339, 63344, 63345, 63350, 63351, 63352, 63353, 63359, 63359, 63361, 63363, 63382, 63388, 63384, 63036, 63087, 63330)
asthma_full %>%
filter(as.character(zip) %in% exclude == FALSE) -> asthma_full
changing NA’s to 0s
asthma_full%>%
mutate(count_11 = ifelse(is.na(count_11) == TRUE, 0, count_11)) %>%
mutate(count_12 = ifelse(is.na(count_12) == TRUE, 0, count_12)) %>%
mutate(count_13 = ifelse(is.na(count_13) == TRUE, 0, count_13)) %>%
mutate(count_14 = ifelse(is.na(count_14) == TRUE, 0, count_14)) %>%
mutate(count_15 = ifelse(is.na(count_15) == TRUE, 0, count_15))-> asthma_full
Creating count for all 5 years
asthma_full %>%
group_by(zip) %>%
mutate(total_count = sum(count_15, count_14, count_13, count_12, count_11)) %>%
select(zip, total_count, geometry) -> asthma_full
rowwise() %>% getting census data to get population for zip code tracts. using 5 year estimates from 2015
#acs <- load_variables(year = 2015, dataset = "acs5", cache = TRUE)
pop <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B01003_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
filtering for zip codes in st. louis metro area
pop %>%
filter(GEOID >= (63005))%>%
filter(GEOID <= (63390)) ->pop
pop %>%
filter(as.character(GEOID) %in% exclude == FALSE)%>%
rename(zip = GEOID,
pop = estimate) %>%
select(zip, pop) -> pop
left join pop_metro to asthma_count_total, making variable for count
left_join(asthma_full, pop, by = "zip") %>%
mutate(asthma_rate = (total_count/pop)*1000) %>%
select(zip, asthma_rate, geometry) -> asthma_full
mapview(asthma_full)
creating ggplot map
ggplot() +
geom_sf(data = asthma_full, mapping = aes(fill = asthma_rate)) +
scale_fill_distiller(palette = "Greens", trans ="reverse")
dir.create(here("data","clean", "asthma", "asthma_full"))
'/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/asthma/asthma_full' already exists
st_write(asthma_full, dsn = here("data", "clean", "asthma", "asthma_full", "asthma_full.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driver
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/asthma/asthma_full/asthma_full.shp' using driver `ESRI Shapefile'
Writing layer `asthma_full' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/asthma/asthma_full/asthma_full.shp' using driver `ESRI Shapefile'
features: 127
fields: 2
geometry type: Multi Polygon
loading data for year 2015 (five year estimate)
aa <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B02009_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
aa %>%
rename(zip = GEOID) -> aa
Joining with pop data
aa <- left_join(aa, pop, by = "zip")
normalizing by pop
aa %>%
mutate(percent_aa = estimate/pop) %>%
rename(african_american = estimate) -> aa
join with asthma data
aa <- left_join(asthma_full, aa, by = "zip")
aa %>%
select(zip, asthma_rate, african_american, pop, percent_aa) -> aa
saving shapefile file for % african american
dir.create(here("data","clean", "demo", "precent_aa"))
st_write(aa, dsn = here("data", "clean", "demo", "precent_aa", "precent_aa.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driverGDAL Error 1: /Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/precent_aa/precent_aa.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/precent_aa/precent_aa.shp' failed
Writing layer `precent_aa' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/precent_aa/precent_aa.shp' using driver `ESRI Shapefile'
features: 127
fields: 5
geometry type: Multi Polygon
loading five year estimate for 2015
median_income <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B06011_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
median_income %>%
rename(zip = GEOID) -> median_income
join with pop data
median_income <- left_join(median_income, pop, by = "zip")
normalize by pop
median_income%>%
mutate(income_precent = estimate/pop) %>%
rename(median_income = estimate) -> median_income
join with asthma data
median_income <- left_join(asthma_full, median_income, by = "zip")
median_income %>%
select(zip, asthma_rate, median_income, pop, income_precent) -> median_income
saving as shapefile
dir.create(here("data","clean", "demo", "med_income"))
st_write(median_income, dsn = here("data", "clean", "demo", "med_income", "med_income.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driverGDAL Error 1: /Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/med_income/med_income.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/med_income/med_income.shp' failed
Writing layer `med_income' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/med_income/med_income.shp' using driver `ESRI Shapefile'
features: 127
fields: 5
geometry type: Multi Polygon
loading 5 year estimate from 2015
home_value <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B25077_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
home_value %>%
rename(zip = GEOID) -> home_value
joining with pop data
home_value <- left_join(home_value, pop, by = "zip")
normalizing by pop
home_value%>%
mutate(value_percent = estimate/pop) %>%
rename(home_value = estimate) -> home_value
joining with asthma data
home_value <- left_join(asthma_full, home_value, by = "zip")
home_value %>%
select(zip, asthma_rate, home_value, pop, value_percent) -> home_value
saving as shapefile
dir.create(here("data","clean", "demo", "home_value"))
st_write(home_value, dsn = here("data", "clean", "demo", "home_value", "home_value.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driverGDAL Error 1: /Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/home_value/home_value.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/home_value/home_value.shp' failed
Writing layer `home_value' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/home_value/home_value.shp' using driver `ESRI Shapefile'
features: 127
fields: 5
geometry type: Multi Polygon
loading five year estimate from 2015
medicaid <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B992707_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
medicaid %>%
rename(zip = GEOID) -> medicaid
joining with pop
medicaid <- left_join(medicaid, pop, by = "zip")
normalizing by pop
medicaid %>%
mutate(percent_medicaid = estimate/pop) %>%
rename(medicaid = estimate) -> medicaid
joining with asthma data
medicaid <- left_join(asthma_full, medicaid, by = "zip")
medicaid %>%
select(zip, asthma_rate, medicaid, pop, percent_medicaid) -> medicaid
saving as shapefile
dir.create(here("data","clean", "demo", "medicaid"))
st_write(medicaid, dsn = here("data", "clean", "demo", "medicaid", "medicaid.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driverGDAL Error 1: /Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/medicaid/medicaid.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/medicaid/medicaid.shp' failed
Writing layer `medicaid' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/medicaid/medicaid.shp' using driver `ESRI Shapefile'
features: 127
fields: 5
geometry type: Multi Polygon
loading five year estimates for 2015
poverty <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B17001_002", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
loading poverty total
poverty_total <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B17001_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
poverty %>%
select(GEOID, estimate) -> poverty
poverty <- left_join(poverty, poverty_total, by = "GEOID")
normalizing by povery total
poverty %>%
mutate(poverty_percent = estimate.x/estimate.y) %>%
rename(zip = GEOID,
poverty = estimate.x,
pov_total = estimate.y) -> poverty
Error in mutate_impl(.data, dots) :
Evaluation error: object 'estimate.x' not found.
joining with asthma data
poverty <- left_join(asthma_full, poverty, by = "zip")
poverty %>%
select(zip, asthma_rate, poverty, pov_total, poverty_percent) -> poverty
dir.create(here("data","clean", "demo", "poverty"))
st_write(poverty, dsn = here("data", "clean", "demo", "poverty", "poverty.shp"), delete_dsn = TRUE)
Field names abbreviated for ESRI Shapefile driverGDAL Error 1: /Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/poverty/poverty.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/poverty/poverty.shp' failed
Writing layer `poverty' to data source `/Users/avagagner/Desktop/Courses/Spring2019/SOC4650/GitHub/redliningAsthma-Zipcode/data/clean/demo/poverty/poverty.shp' using driver `ESRI Shapefile'
features: 127
fields: 5
geometry type: Multi Polygon